1. what are networks?

  • simply put, a network is a collection of connected objects. networks represent interrelationships between actors of all sorts. networks focus on the relationships between actors.

  • components: nodes/vertices, links/edges. “We refer to the objects as nodes or vertices, and usually draw them as points. We refer to the connections between the nodes as edges, and usually draw them as lines between points.”

network

network

different types of networks

different types of networks

  • structure of network data: adjacency matrix or edgelist

adjacency matrix edgelist

2. softwares and packages

software:

r:

3. network analysis in r

3.1 read and process sample data

#required packages
pkgs = c("tidyverse", "igraph", "ggraph", "tidygraph")
#install the packages that you don't have using install.packages("X")
#load packages
ld_pkgs = lapply(pkgs, library, character.only = TRUE)


#read data
df = read_csv("1. sample twitter data (SCOTUS).csv") 

#edges (edgelist)
edges = df %>% filter(retweet_user_handle != "\\N") %>% 
  select(user_handle, retweet_user_handle) %>% 
  count(user_handle, retweet_user_handle) %>% ungroup() %>% 
  rename(from = user_handle, to = retweet_user_handle, weight = n) 

head(edges)
## # A tibble: 6 x 3
##   from            to           weight
##   <chr>           <chr>         <int>
## 1 ___Danno        ctvqp             1
## 2 __Dayo          KHOU              1
## 3 __Resonance     The_Tusker        1
## 4 _1_____________ Reuters           1
## 5 _Alibama        BreakingNews      1
## 6 _Arvindh        FactHive          1
#nodes and their attributes
#total degree 
nodes = tibble(handle = unique(c(edges$from, edges$to)))
#out degree
temp_out = as.data.frame(table(edges$from))
colnames(temp_out) = c("handle","out_degree")
#in degree
temp_in = as.data.frame(table(edges$to))
colnames(temp_in) = c("handle","in_degree")
#merge
nodes = nodes %>% left_join(temp_out, by = "handle") %>% left_join(temp_in, by = "handle") %>%
  mutate(high_indegree = ifelse(in_degree > 29, 1, 0))
#
nodes[is.na(nodes)] = 0

head(nodes)
## # A tibble: 6 x 4
##   handle          out_degree in_degree high_indegree
##   <chr>                <dbl>     <dbl>         <dbl>
## 1 ___Danno                 1         0             0
## 2 __Dayo                   1         0             0
## 3 __Resonance              1         0             0
## 4 _1_____________          1         0             0
## 5 _Alibama                 1         0             0
## 6 _Arvindh                 1         0             0

3.2 ggraph

ggraph is an extension of ggplot2, so you can easily apply ggplot grammer to quickly learning how ggraph works.

central parts of ggraph:

  • Layouts

  • Nodes

  • Edges

create ggraph object

#construct the ggraph object
rt_net <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)

#inspect the object
class(rt_net)
## [1] "tbl_graph" "igraph"
rt_net
## # A tbl_graph: 9470 nodes and 7139 edges
## #
## # A directed simple graph with 2377 components
## #
## # Node Data: 9,470 x 4 (active)
##   handle          out_degree in_degree high_indegree
##   <chr>                <dbl>     <dbl>         <dbl>
## 1 ___Danno                 1         0             0
## 2 __Dayo                   1         0             0
## 3 __Resonance              1         0             0
## 4 _1_____________          1         0             0
## 5 _Alibama                 1         0             0
## 6 _Arvindh                 1         0             0
## # … with 9,464 more rows
## #
## # Edge Data: 7,139 x 3
##    from    to weight
##   <int> <int>  <int>
## 1     1  6624      1
## 2     2  6625      1
## 3     3  6626      1
## # … with 7,136 more rows

visualize the graph

ggraph(rt_net) + 
  geom_edge_link() + 
  geom_node_point()
## Using `nicely` as default layout

ggraph(rt_net, layout = 'nicely') + 
  geom_edge_link(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 1)) +
  geom_node_point(aes(color = as.factor(high_indegree), size = in_degree)) +
  geom_node_label(aes(filter = (high_indegree ==1), label = handle), repel = TRUE)

you can specify layout: https://igraph.org/r/doc/layout_.html

you can even use the facet function: ggraph facet

3.3 igraph

create an igraph object

#construct the igraph object
g = graph.data.frame(d = edges, vertices=nodes, directed=TRUE)

#other ways to contrsuct the igraph object: 
#graph_from_adjacency_matrix, graph_from_data_frame, graph_from_edgelist; see: https://igraph.org/r/doc/


#inspect the igraph object
class(g)
## [1] "igraph"
g
## IGRAPH 3d4ef0b DNW- 9470 7139 -- 
## + attr: name (v/c), out_degree (v/n), in_degree (v/n),
## | high_indegree (v/n), weight (e/n)
## + edges from 3d4ef0b (vertex names):
##  [1] ___Danno       ->ctvqp           __Dayo         ->KHOU           
##  [3] __Resonance    ->The_Tusker      _1_____________->Reuters        
##  [5] _Alibama       ->BreakingNews    _Arvindh       ->FactHive       
##  [7] _BannedInBoston->Occupied_Nation _bee_buzz_     ->MARGAlade      
##  [9] _chynadoll_    ->WSJ             _CJustice_     ->peoplefor      
## [11] _clairehuxtable->OBABL           _eights        ->jgalicot       
## [13] _Ella_G        ->HuffingtonPost  _filo__        ->TheAtlantic    
## + ... omitted several edges
g[][100:110, 1:5]
## 11 x 5 sparse Matrix of class "dgCMatrix"
##                ___Danno __Dayo __Resonance _1_____________ _Alibama
## 7NewsBrisbane         .      .           .               .        .
## 7thgenyang            .      .           .               .        .
## 8s                    .      .           .               .        .
## 900chuhay             .      .           .               .        .
## 905GoTrainGirl        .      .           .               .        .
## 99islandsummer        .      .           .               .        .
## 99Pele                .      .           .               .        .
## A_amusa               .      .           .               .        .
## a_d_wood              .      .           .               .        .
## A_Peabody             .      .           .               .        .
## A_Sherie              .      .           .               .        .
E(g)
## + 7139/7139 edges from 3d4ef0b (vertex names):
##  [1] ___Danno       ->ctvqp           __Dayo         ->KHOU           
##  [3] __Resonance    ->The_Tusker      _1_____________->Reuters        
##  [5] _Alibama       ->BreakingNews    _Arvindh       ->FactHive       
##  [7] _BannedInBoston->Occupied_Nation _bee_buzz_     ->MARGAlade      
##  [9] _chynadoll_    ->WSJ             _CJustice_     ->peoplefor      
## [11] _clairehuxtable->OBABL           _eights        ->jgalicot       
## [13] _Ella_G        ->HuffingtonPost  _filo__        ->TheAtlantic    
## [15] _grandhotel    ->txgdb           _HeavyP        ->BreakingNews   
## [17] _isabelacb     ->bbcnews_ticker  _JBrown22      ->_JMurray23     
## [19] _jesseamaya    ->JLLLOW          _justBITTEN    ->MischterX      
## + ... omitted several edges
E(g)$weight[1:10]
##  [1] 1 1 1 1 1 1 1 1 1 1
V(g) 
## + 9470/9470 vertices, named, from 3d4ef0b:
##    [1] ___Danno        __Dayo          __Resonance     _1_____________
##    [5] _Alibama        _Arvindh        _BannedInBoston _bee_buzz_     
##    [9] _chynadoll_     _CJustice_      _clairehuxtable _eights        
##   [13] _Ella_G         _filo__         _grandhotel     _HeavyP        
##   [17] _isabelacb      _JBrown22       _jesseamaya     _justBITTEN    
##   [21] _ldeassis_      _Nigerienne_    _Orwell         _PDSweetTarts  
##   [25] _shaymt         _skreetch_      _StuartGray     _TheREBEL111   
##   [29] _TselNoelle_    _YeaSheDarkSkin 0missjones      0Yara0         
##   [33] 100Royd         12030312        123soxfan       124786Ashraf   
##   [37] 147DW           147maks         15say           1789kevin      
## + ... omitted several vertices
V(g)$name[1:10]
##  [1] "___Danno"        "__Dayo"          "__Resonance"    
##  [4] "_1_____________" "_Alibama"        "_Arvindh"       
##  [7] "_BannedInBoston" "_bee_buzz_"      "_chynadoll_"    
## [10] "_CJustice_"
V(g)$out_degree[1:10]
##  [1] 1 1 1 1 1 1 1 1 1 1
V(g)$in_degree[1:10]
##  [1] 0 0 0 0 0 0 0 0 0 0
V(g)$high_indegree[1:10]
##  [1] 0 0 0 0 0 0 0 0 0 0
# edge_attr(g)
# vertex_attr(g)

# you can set new attributes using: V(g)$media <- vector

# g <- simplify(g, remove.multiple = F, remove.loops = T) 

visualize

#specify a particular layout: https://igraph.org/r/doc/layout_.html
l <- layout_nicely(g)
# l <- layout_on_grid(g)

#plot retweet network
plot(g, layout=l,
    vertex.size = ifelse(V(g)$in_degree > 29, 1.5, 0.5), 
    vertex.color = ifelse(V(g)$in_degree > 29, "red", "grey"), 
    vertex.label = ifelse(V(g)$in_degree > 29, V(g)$name, NA), 
    # vertex.label.color = ifelse(V(g)$media == 1, "red", "black"), 
    vertex.label.cex = 1,
    vertex.label.dist = 0.5, 
    edge.arrow.size = 0.5, 
    edge.arrow.width = 0.5, 
    edge.width = E(g)$weight,
    edge.color = "gray") 

network and node descriptives

#Density: The proportion of present edges from all possible edges in the network.
edge_density(g, loops=F)
## [1] 7.961287e-05
ecount(g)/(vcount(g)*(vcount(g)-1)) #for a directed gwork
## [1] 7.961287e-05
#Reciprocity: The proportion of reciprocated ties (for a directed gwork).
reciprocity(g)
## [1] 0.0002801513
#Node degrees and distribution: The function degree() has a mode of in for in-degree, out for out-degree, and all or total for total degree.
deg <- degree(g, mode="all")

summary(deg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.508   1.000 117.000
hist(log(deg), breaks = 50, main="Histogram of node degree")

#Degree distribution
deg.dist <- degree_distribution(g, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange", 
      xlab="Degree", ylab="Cumulative Frequency")

#Centrality & centralization
# in_degree = degree(g, mode="in")
# centr_degree(g, mode="in", normalized=T)
# 
# closeness(g, mode="all", weights=NA) 
# centr_clo(g, mode="all", normalized=T) 
# 
# eigen_centrality(g, directed=T, weights=NA)
# centr_eigen(g, directed=T, normalized=T) 
# 
# betweenness(g, directed=T, weights=NA)
# edge_betweenness(g, directed=T, weights=NA)
# centr_betw(g, directed=T, normalized=T)

#Hubs and authorities
hs <- hub_score(g, weights=NA)$vector
summary(hs)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.01311 0.00000 1.00000
sort(hs, decreasing = TRUE)[1:30]
##      EricaMowry  Rushboes_cigar JacquelynCalver      NickASAVet 
##       1.0000000       0.9982954       0.9895504       0.9692826 
##      TallyAnnaE     AlessCullen         dondanl   Lone_Star_Dem 
##       0.9588446       0.9586887       0.9586146       0.9586146 
##       BukowsKai       shacker56    TomMHarrison   Active_Artist 
##       0.9585424       0.9585424       0.9585424       0.9502951 
##           aglac          AKorst    alexisairvin       AmirZakii 
##       0.9502951       0.9502951       0.9502951       0.9502951 
##  andriconthejob     AngryBroads         AOKiger    BonnieNathan 
##       0.9502951       0.9502951       0.9502951       0.9502951 
##      boomernerd         boyjohn     Brett_Myers       BubbyKatz 
##       0.9502951       0.9502951       0.9502951       0.9502951 
##    caroldeserio       cbaumer10       cbrodrick       crankydem 
##       0.9502951       0.9502951       0.9502951       0.9502951 
##      damelio383          danero 
##       0.9502951       0.9502951
plot(g, layout=l,
    vertex.size= ifelse(hs > 0.9502951, 1.5, 0.2), 
    vertex.color=ifelse(hs > 0.9502951 , "red", "grey"), 
    vertex.label = NA, 
    # vertex.label.color = ifelse(V(g)$media == 1, "red", "black"), 
    edge.arrow.size=0.1, 
    edge.arrow.width=0.1, 
    edge.width= E(g)$weight/21, 
    edge.color="gray")

as <- authority_score(g, weights=NA)$vector
summary(as)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0000000 0.0001321 0.0000000 1.0000000
sort(as, decreasing = TRUE)[1:30]
##          LOLGOP  HuffingtonPost  elizabethforma     BarackObama 
##    1.0000000000    0.0523046623    0.0505108573    0.0413084505 
##       peoplefor           Salon     Truthbuster   HuffPostMiami 
##    0.0109722462    0.0090084021    0.0089966623    0.0088326295 
## MatthewCallaway            ALW2           SirrK  Voter99percent 
##    0.0087546142    0.0087546142    0.0086786361    0.0086786361 
##     Foxfieldguy      randiradio       RWwatchMA             WSJ 
##    0.0086786361    0.0027201427    0.0009964087    0.0009016215 
##     HuffPostPol   iSupremeCourt       AriBerman      sahilkapur 
##    0.0008840206    0.0006221957    0.0005828569    0.0005793129 
## BoldProgressive OrganicConsumer             TPM    ProgressMass 
##    0.0005510085    0.0004968238    0.0004943750    0.0004938370 
##     TheLastWord  JeffersonObama          allout         NBCNews 
##    0.0004754221    0.0004710197    0.0004702283    0.0004668465 
## 1BeautifulKarma      TheMsVee83 
##    0.0004665025    0.0004663393
plot(g, layout=l,
    vertex.size= ifelse(as > 0.0004668465, 1.5, 0.2), 
    vertex.color=ifelse(as > 0.0004668465 , "red", "grey"), 
    vertex.label = NA, 
    # vertex.label.color = ifelse(V(g)$media == 1, "red", "black"), 
    edge.arrow.size=0.1, 
    edge.arrow.width=0.1, 
    edge.width= E(g)$weight/21, 
    edge.color="gray")

#Subgroups and communities
g.sym <- as.undirected(g, mode= "collapse", edge.attr.comb=list(weight="sum", "ignore"))

#find cliques
# cliques(g.sym) # list of cliques       
# sapply(cliques(g.sym), length) # clique sizes
largest_cliques(g.sym) # cliques with max number of nodes
## [[1]]
## + 3/9470 vertices, named, from c79aca7:
## [1] PositiveLiteCom ruraltweeter    videodouble    
## 
## [[2]]
## + 3/9470 vertices, named, from c79aca7:
## [1] globaltvnews Beari8it     TarSandsTwat
## 
## [[3]]
## + 3/9470 vertices, named, from c79aca7:
## [1] IamTlewis       BanksforJustice MBK_91         
## 
## [[4]]
## + 3/9470 vertices, named, from c79aca7:
## [1] usmanmanzoor  omar_quraishi sami_ravian
#community detection
# ceb <- cluster_edge_betweenness(g) 
# dendPlot(ceb, mode="hclust")

4. references/additional tutorials: